####################################################
### Make Data Objects for Exchange Data Analysis ###
####################################################

##### Objects we need to make

	# Choice object: households by plans, including uninsured (what about households with multiple plans?)
		# 1 for selected plans
		# 0 for non-selected plans in choice set
		# NA for plans not in choice set
	# Premium object: households by plans
	# Households object: household characteristics
	# Plan object: plan characteristics


##### Load Data

#memory.limit(60000)
setwd("C:/Users/eas24f/OneDrive - Florida State University/Research/Inertia RESTAT/Data") # Office computer directory

data <- get(load("ca_enrollment_data_AUG022019")) # "Cleaned" Covered California enrollment data
households <- get(load("ca_household_data_AUG022019")) # "Cleaned" Covered California household data
#plan_data <- get(load("ca_plan_data_OCT052018")) # Covered California plan data
plan_data <- read.csv("ca_plan_data2.csv") # Covered California plan data
age_rating_factors <- read.csv("age_rating_factors.csv",row.names=1) # CCIIO default rating curve
poverty_guidelines <- read.csv("poverty_guidelines.csv",row.names=1) # Poverty guidelines
rating_areas <- read.csv("ca_rating_areas.csv",row.names = 1) # California county-rating area mapping
zip3_choices <- read.csv("zip3_choices2.csv",row.names = 1) # choice set by 3 digit zip and rating area
product_definitions <- read.csv("product_definitions.csv",row.names = 1) # definitions of column names in zip3_choices
contribution_percentages <- read.csv("contribution_percentages.csv",row.names = 1) # ACA contribution percentages
network_data <- read.csv("ca_network_data10.csv") # CA network data
rownames(network_data) <- network_data$unique_id2
netpair_data <- read.csv("pairwise_inclus.csv") # CA network data
outside_logit <- get(load("sipp_logit"))

fill_missing <- FALSE
keep_missing <- FALSE
use_mean <- TRUE
outside_sample <- TRUE
set.seed(2)

households <- households[!households$flagged,]
data <- data[!data$flagged,]

gc()

##### Predict who left market

	if(outside_sample) {
		
		# Add variables
		
			# Income brackets
			households$FPL_bracket <- "138orless"
			households[households$FPL > 1.38 & households$FPL <= 2.50,"FPL_bracket"] <- "138to250"
			households[households$FPL > 2.50 & households$FPL <= 4,"FPL_bracket"] <- "250to400"
			households[households$FPL > 4,"FPL_bracket"] <- "400ormore"
		
			# Age Brackets
			households$perc_18to34 <- households$perc_18to25 + households$perc_26to34
			households$perc_35to54 <- households$perc_35to44 + households$perc_45to54
			
		# Determine in/out of market
		households$out_of_market <- 0
		out_of_market_predictions <- as.numeric(predict(outside_logit,newdata=households[is.na(households$plan_number_nocsr),],type="response") >=
			runif(nrow(households[is.na(households$plan_number_nocsr),])))
		households[is.na(households$plan_number_nocsr),"out_of_market"] <- out_of_market_predictions
		
		# Remove records from market - About 26.8% of uninsured records are kept 
		households <- households[households$out_of_market == 0,]
		data <- data[data$household_year %in% rownames(households),]
	}
	
	gc()

##### Take Sample (5% of households, not records)
	
	set.seed(6)
	sample_size <- round(.05 * length(unique(households$household_id)))
	keep_household_ids <- sort(sample(unique(households$household_id),size=sample_size,replace=FALSE))
	keep_records <- which(households$household_id %in% keep_household_ids) 
	households <- households[keep_records,]
	data <- data[data$household_year %in% rownames(households),]	

	# for fill missing, let's discard 2019 records
	households <- households[households$year < 2019,]
	data <- data[data$year < 2019,]

##### Choice Matrix (households by plans)
	# 1 if household selected plan, 0 if household did not select plan, NA if plan not in choice set
	# Complication: households that selected multiple plans
	
	# NOTE: I am reordering plan data because I defined a plan_id number earlier based on a previous ordering
	plan_data$Plan_ID_Order <- as.character(plan_data$Plan_ID_Order)
	rownames(plan_data) <- paste(plan_data$Plan_Name,plan_data$ENROLLMENT_YEAR,
		plan_data$region,sep="")
	plan_data <- plan_data[plan_data$Plan_ID_Order,]
	
	full_plan_names <- sort(unique(plan_data$Plan_Name))
	full_plan_numbers <- 1:length(full_plan_names)
	names(full_plan_numbers) <- full_plan_names
	plan_data$full_plan_number <- full_plan_numbers[plan_data$Plan_Name]
	
	plan_names <- sort(unique(plan_data$Plan_Name2))
	plan_numbers <- 1:length(plan_names)
	names(plan_numbers) <- plan_names
	#data$plan_number <- NA
	#data[!is.na(data$plan_id),"plan_number"] <- plan_numbers[data[!is.na(data$plan_id),"plan_name"]]
	data[is.na(data$plan_id),"plan_number"] <- max(data$plan_number,na.rm=TRUE) + 1
	plan_data$plan_number <- plan_numbers[plan_data$Plan_Name2]
	
	# Assign an integer to each plan (eliminating small plans
	plan_names_small <- sort(unique(plan_data$Plan_Name_Small))
	plan_numbers_small <- 1:length(plan_names_small)
	names(plan_numbers_small) <- plan_names_small
	#data$plan_number_small <- NA
	#data[!is.na(data$plan_id),"plan_number_small"] <- plan_numbers_small[data[!is.na(data$plan_id),"plan_name"]]
	#data[is.na(data$plan_id),"plan_number_small"] <- max(data$plan_number_small,na.rm=TRUE) + 1
	plan_data$plan_number_small <- plan_numbers_small[plan_data$Plan_Name_Small]
	
	# In this version, we are going to eliminate CSR, HSA, and Catastrophic plans
	plan_names_nohsacat <- sort(unique(plan_data$Plan_Name_Small_NOHSACAT))
	plan_numbers_nohsacat <- 1:length(plan_names_nohsacat)
	names(plan_numbers_nohsacat) <- plan_names_nohsacat
	plan_data$plan_number_nohsacat <- plan_numbers_nohsacat[plan_data$Plan_Name_Small_NOHSACAT]
	
	# This will link the full plan number to the nohsacat plan number
	reference_numbers <- plan_numbers
	for(j in 1:length(reference_numbers)){
		reference_plan_name <- unique(plan_data[which(plan_data$Plan_Name == names(plan_numbers)[j]),"Plan_Name_Small_NOHSACAT"]) 
		reference_numbers[j] <- plan_numbers_nohsacat[reference_plan_name]
	}
	
	full_reference_numbers <- full_plan_numbers
	for(j in 1:length(full_reference_numbers)){
		full_reference_plan_name <- unique(plan_data[which(plan_data$Plan_Name == names(full_plan_numbers)[j]),"Plan_Name_Small_NOHSACAT"]) 
		full_reference_numbers[j] <- plan_numbers_nohsacat[full_reference_plan_name]
	}
	
	
	plan_data$Issuer_Name <- as.character(plan_data$Issuer_Name)
	plan_data[plan_data$Issuer_Name == "Anthem Blue Cross","Issuer_Name"] <- "Anthem"
	plan_data[plan_data$Issuer_Name == "Blue Shield","Issuer_Name"] <- "Blue_Shield"
	plan_data[plan_data$Issuer_Name == "Chinese Community","Issuer_Name"] <- "Chinese_Community"
	plan_data[plan_data$Issuer_Name == "Contra Costa Health Plan","Issuer_Name"] <- "Contra_Costa"
	plan_data[plan_data$Issuer_Name == "Health Net","Issuer_Name"] <- "Health_Net"
	plan_data[plan_data$Issuer_Name == "LA Care","Issuer_Name"] <- "LA_Care"
	plan_data[plan_data$Issuer_Name == "UnitedHealthcare","Issuer_Name"] <- "United"
	plan_data[plan_data$Issuer_Name == "Western Health","Issuer_Name"] <- "Western"
	plan_data[plan_data$Issuer_Name == "Sharp","Issuer_Name"] <- "SHARP"
	
	plan_data[plan_data$Issuer_Name != "SHARP","Network_num"] <- 1
	
	zipchoices <- as.matrix(zip3_choices[,4:ncol(zip3_choices)])
	product_definitions$insurer <- as.character(product_definitions$insurer)
	product_definitions$plan_network_type <- as.character(product_definitions$plan_network_type)
	single_product_insurers <- c("Chinese_Community","Contra_Costa","Kaiser","LA_Care","Molina",
				"Oscar","United","Valley","Western")
	large_insurers <- c("Anthem","Blue_Shield","Kaiser","Health_Net","Molina")
	#households$zip_region_year <- paste(households$zip_3,households$rating_area,households$year,sep="_")			
				
	# Initialize choice and new plans matrix
	choices <- matrix(NA,nrow=dim(households)[1],ncol=length(plan_names_nohsacat),dimnames=list(rownames(households),plan_names_nohsacat))	
	new_choices <- matrix(0,nrow=dim(households)[1],ncol=length(plan_names_nohsacat),dimnames=list(rownames(households),plan_names_nohsacat))	
	network_breadth <- matrix(NA,nrow=dim(households)[1],ncol=length(plan_names_nohsacat),dimnames=list(rownames(households),plan_names_nohsacat))	
	network_inclus <- matrix(NA,nrow=dim(households)[1],ncol=length(plan_names_nohsacat),dimnames=list(rownames(households),plan_names_nohsacat))	
	
	# Households
	households$change_in_products <- 0
	households$change_in_insurers <- 0
	households$change_in_large_insurers <- 0
	households$market_turnover <- 0
	households$netpair_missing <- 0
	
	plan_data$Plan_Name_Small_NOHSACAT <- as.character(plan_data$Plan_Name_Small_NOHSACAT)
	
	# Calculate average net breadth and inclusivity by zip-year
	
		network_data$zip_year <- paste(network_data$zip3,network_data$year,sep="_")

		netbreadth_market_averages <- 
			by(network_data$netbreadth_zip3 * network_data$total_enrollees,network_data$zip_year,sum,na.rm=T)/
			by(network_data$total_enrollees,network_data$zip_year,sum,na.rm=T)
		netbreadth_market_averages[netbreadth_market_averages == 0] <- NA	

		netinclus_market_averages <- by(network_data$net_inclusivity_zip3 * network_data$total_enrollees,network_data$zip_year,sum,na.rm=T)/
			by(network_data$total_enrollees,network_data$zip_year,sum,na.rm=T)
		netinclus_market_averages[netinclus_market_averages == 0] <- NA	


	# Choices for the exchange population for which we have 3-digit zip (plan has to be offered in year and zip)
	for(i in rownames(zip3_choices)) {
			
			t <- zip3_choices[i,"Year"]
			region <- zip3_choices[i,"Region"]
			zip3 <- zip3_choices[i,"zip3"]
			zip_year <- paste(zip3,t,sep="_")
		
			# Get the broad product categories available (insurer/network type/etc. - no metal)
			region_year_plans <- which(plan_data$ENROLLMENT_YEAR == t &
				plan_data$region == zip3_choices[i,"Region"])
			available_products <- zipchoices[i,]
			available_products <- names(available_products)[!is.na(available_products)]
			available_insurers <- unique(product_definitions[available_products,"insurer"])
			large_available_insurers <- intersect(large_insurers,available_insurers)
			available_plans <- c()
			
			# Last year's plans
			if(t > 2014) {
				lastyr_zip <- paste(zip3_choices[i,"zip3"],zip3_choices[i,"Region"],t-1,sep="_")
				lastyr_region_year_plans <- which(plan_data$ENROLLMENT_YEAR == t-1 &
					plan_data$region == zip3_choices[lastyr_zip,"Region"])
				lastyr_available_products <- zipchoices[lastyr_zip,]
				lastyr_available_products <- names(lastyr_available_products)[!is.na(lastyr_available_products)]
				lastyr_available_insurers <- unique(product_definitions[lastyr_available_products,"insurer"])
				lastyr_large_available_insurers <- intersect(large_insurers,lastyr_available_insurers)
				lastyr_available_plans <- c()
			}
			
			# Now get the specific plans available
						
			if ("Anthem" %in% available_insurers) {
				if("Anthem_HMO" %in% available_products) {
					available_plans <- c(available_plans,
						intersect(region_year_plans,
							which(plan_data$Issuer_Name == "Anthem" & plan_data$PLAN_NETWORK_TYPE == "HMO" &
							plan_data$MSP == 0)))
				}
				if("Anthem_EPO" %in% available_products) {
					available_plans <- c(available_plans,
						intersect(region_year_plans,
							which(plan_data$Issuer_Name == "Anthem" & plan_data$PLAN_NETWORK_TYPE == "EPO" &
							plan_data$MSP == 0)))
				}
				if("Anthem_PPO" %in% available_products) {
					available_plans <- c(available_plans,
						intersect(region_year_plans,
							which(plan_data$Issuer_Name == "Anthem" & plan_data$PLAN_NETWORK_TYPE == "PPO" &
							plan_data$MSP == 0)))
				}
				if("Anthem_EPO_MSP" %in% available_products) {
					available_plans <- c(available_plans,
						intersect(region_year_plans,
							which(plan_data$Issuer_Name == "Anthem" & plan_data$PLAN_NETWORK_TYPE == "EPO" &
							plan_data$MSP == 1)))
				}
				if("Anthem_PPO_MSP" %in% available_products) {
					available_plans <- c(available_plans,
						intersect(region_year_plans,
							which(plan_data$Issuer_Name == "Anthem" & plan_data$PLAN_NETWORK_TYPE == "PPO" &
							plan_data$MSP == 1)))
				}
			} 
			if ("Blue_Shield" %in% available_insurers) {
				# could be HMO, EPO, PPO
				if("Blue_Shield_HMO" %in% available_products) {
					available_plans <- c(available_plans,
						intersect(region_year_plans,
							which(plan_data$Issuer_Name == "Blue_Shield" & plan_data$PLAN_NETWORK_TYPE == "HMO" )))
				}
				if("Blue_Shield_EPO" %in% available_products) {
					available_plans <- c(available_plans,
						intersect(region_year_plans,
							which(plan_data$Issuer_Name == "Blue_Shield" & plan_data$PLAN_NETWORK_TYPE == "EPO" )))
				}
				if("Blue_Shield_PPO" %in% available_products) {
					available_plans <- c(available_plans,
						intersect(region_year_plans,
							which(plan_data$Issuer_Name == "Blue_Shield" & plan_data$PLAN_NETWORK_TYPE == "PPO" )))
				}
			} 
			if ("Health_Net" %in% available_insurers) {
				# could be HMO, HSP, EPO, PPO
				if("Health_Net_HMO" %in% available_products) {
					available_plans <- c(available_plans,
						intersect(region_year_plans,
							which(plan_data$Issuer_Name == "Health_Net" & plan_data$PLAN_NETWORK_TYPE == "HMO")))
				}
				if("Health_Net_HSP" %in% available_products) {
					available_plans <- c(available_plans,
						intersect(region_year_plans,
							which(plan_data$Issuer_Name == "Health_Net" & plan_data$PLAN_NETWORK_TYPE == "HSP")))
				}
				if("Health_Net_EPO" %in% available_products) {
					available_plans <- c(available_plans,
						intersect(region_year_plans,
							which(plan_data$Issuer_Name == "Health_Net" & plan_data$PLAN_NETWORK_TYPE == "EPO")))
				}
				if("Health_Net_PPO" %in% available_products) {
					available_plans <- c(available_plans,
						intersect(region_year_plans,
							which(plan_data$Issuer_Name == "Health_Net" & plan_data$PLAN_NETWORK_TYPE == "PPO")))
				}
			}
			if ("SHARP" %in% available_insurers) {
				# could be network 1 or 2
				if("Sharp1" %in% available_products) {
					available_plans <- c(available_plans,
						intersect(region_year_plans,
							which(plan_data$Issuer_Name == "SHARP" & plan_data$Network_num == 1 & 
							!is.na(plan_data$Network_num))))
				}
				if("Sharp2" %in% available_products) {
					available_plans <- c(available_plans,
						intersect(region_year_plans,
							which(plan_data$Issuer_Name == "SHARP" & plan_data$Network_num == 2 & 
							!is.na(plan_data$Network_num))))
				}
			}
			if (any(single_product_insurers %in% available_insurers)) {
				available_plans <- c(available_plans,
					intersect(region_year_plans,
						which(plan_data$Issuer_Name %in% intersect(available_insurers,single_product_insurers))))
			}
			
			households_to_update <- which(households$zip_region_year == i & !is.na(households$zip_region_year))
			available_plan_names <- unique(as.character(plan_data[available_plans,"Plan_Name_Small_NOHSACAT"]))
			choices[households_to_update,available_plan_names] <- 0
			
			if(length(households_to_update) > 0) {
				available_plan_names <- unique(as.character(plan_data[available_plans,"Plan_Name_Small_NOHSACAT"]))
				choices[households_to_update,available_plan_names] <- 0
				
				# Add network breadth and inclusivity
				
				nondup_available_plans <- available_plans[which(!duplicated(plan_data[available_plans,"Plan_Name_NOCSR"]))]
				network_data_names <- paste(plan_data[nondup_available_plans,"Issuer_Name"],plan_data[nondup_available_plans,"PLAN_NETWORK_TYPE"],
					plan_data[nondup_available_plans,"Network_num"],plan_data[nondup_available_plans,"HSA"],
					t,as.character(plan_data[nondup_available_plans,"metal_level"]),zip3,sep="_")
				
				network_data_extract <- network_data[network_data_names,c("netbreadth_zip3","net_inclusivity_zip3","total_enrollees")]
				network_data_extract$Plan_Name_Small_NOHSACAT <- plan_data[nondup_available_plans,"Plan_Name_Small_NOHSACAT"]
				network_data_extract[is.na(network_data_extract$netbreadth_zip3),"total_enrollees"] <- 0
				
				network_breadths_unweighted <- by(network_data_extract$netbreadth_zip3,network_data_extract$Plan_Name_Small_NOHSACAT,mean,na.rm=TRUE)
				network_breadths_to_add <- 
					by(network_data_extract$netbreadth_zip3 * network_data_extract$total_enrollees,network_data_extract$Plan_Name_Small_NOHSACAT,sum,na.rm=TRUE)/
					by(network_data_extract$total_enrollees,network_data_extract$Plan_Name_Small_NOHSACAT,sum,na.rm=TRUE)
				network_breadths_to_add[is.nan(network_breadths_to_add)] <- network_breadths_unweighted[is.nan(network_breadths_to_add)]
				
					# Have a missing flag in households
					if(sum(is.na(network_breadths_to_add)) > 0) {
						households[households_to_update,"netpair_missing"] <- 1
					} else if (any(network_breadths_to_add == 0)){
						households[households_to_update,"netpair_missing"] <- 1
					}
				
					if(fill_missing & (zip_year %in% names(netbreadth_market_averages))) {
						network_breadths_to_add[is.na(network_breadths_to_add)] <- netbreadth_market_averages[zip_year]
						network_breadths_to_add[network_breadths_to_add == 0] <- netbreadth_market_averages[zip_year]
					}
				
				network_breadth[households_to_update,available_plan_names] <-
					matrix(network_breadths_to_add[available_plan_names],
						nrow=length(households_to_update),ncol=length(network_breadths_to_add),byrow=TRUE)
				
				network_inclus_unweighted <- by(network_data_extract$net_inclusivity_zip3,network_data_extract$Plan_Name_Small_NOHSACAT,mean,na.rm=TRUE)
				network_inclus_to_add <- 
					by(network_data_extract$net_inclusivity_zip3 * network_data_extract$total_enrollees,network_data_extract$Plan_Name_Small_NOHSACAT,sum,na.rm=TRUE)/
					by(network_data_extract$total_enrollees,network_data_extract$Plan_Name_Small_NOHSACAT,sum,na.rm=TRUE)
				network_inclus_to_add[is.nan(network_inclus_to_add)] <- network_inclus_unweighted[is.nan(network_inclus_to_add)]
				
					# Have a missing flag in households
					if(sum(is.na(network_inclus_to_add)) > 0) {
						households[households_to_update,"netpair_missing"] <- 1
					}
					
					if(fill_missing & (zip_year %in% names(netinclus_market_averages))) {
						network_inclus_to_add[is.na(network_inclus_to_add)] <- netinclus_market_averages[zip_year]
					}
				
				network_inclus[households_to_update,available_plan_names] <-
					matrix(network_inclus_to_add[available_plan_names],
						nrow=length(households_to_update),ncol=length(network_inclus_to_add),byrow=TRUE)
			
			}
			
				if(t > 2014) {
					if(length(available_products) > length(lastyr_available_products)) {
						households[households_to_update,"change_in_products"] <- 1 # Net entry
					} else if (length(available_products) < length(lastyr_available_products)) {
						households[households_to_update,"change_in_products"] <- -1 # Net exit
					}
					
					if(length(available_insurers) > length(lastyr_available_insurers)) {
						households[households_to_update,"change_in_insurers"] <- 1 # Net entry
					} else if (length(available_insurers) < length(lastyr_available_insurers)) {
						households[households_to_update,"change_in_insurers"] <- -1 # Net exit
					}
					
					if(length(large_available_insurers) > length(lastyr_large_available_insurers)) {
						households[households_to_update,"change_in_large_insurers"] <- 1 # Net entry
					} else if (length(large_available_insurers) < length(lastyr_large_available_insurers)) {
						households[households_to_update,"change_in_large_insurers"] <- -1 # Net exit
					}
					
					number_in_market <- sum(households[!is.na(households$plan_number_nocsr) & households$zip_region_year == i &
							!is.na(households$zip_region_year),"weight"])
					
					if(number_in_market > 0) {
						households[households_to_update,"market_turnover"] <- 
							sum(households[is.na(households$previous_plan_number) & 
								!is.na(households$plan_number_nocsr) & households$zip_region_year == i &
								!is.na(households$zip_region_year),"weight"])/number_in_market
					}	
					
				}
			
			if(t > 2014) {
				if ("Anthem" %in% lastyr_available_insurers) {
					if("Anthem_HMO" %in% lastyr_available_products) {
						lastyr_available_plans <- c(lastyr_available_plans,
							intersect(lastyr_region_year_plans,
								which(plan_data$Issuer_Name == "Anthem" & plan_data$PLAN_NETWORK_TYPE == "HMO" &
								plan_data$MSP == 0)))
					}
					if("Anthem_EPO" %in% lastyr_available_products) {
						lastyr_available_plans <- c(lastyr_available_plans,
							intersect(lastyr_region_year_plans,
								which(plan_data$Issuer_Name == "Anthem" & plan_data$PLAN_NETWORK_TYPE == "EPO" &
								plan_data$MSP == 0)))
					}
					if("Anthem_PPO" %in% lastyr_available_products) {
						lastyr_available_plans <- c(lastyr_available_plans,
							intersect(lastyr_region_year_plans,
								which(plan_data$Issuer_Name == "Anthem" & plan_data$PLAN_NETWORK_TYPE == "PPO" &
								plan_data$MSP == 0)))
					}
					if("Anthem_EPO_MSP" %in% lastyr_available_products) {
						lastyr_available_plans <- c(lastyr_available_plans,
							intersect(lastyr_region_year_plans,
								which(plan_data$Issuer_Name == "Anthem" & plan_data$PLAN_NETWORK_TYPE == "EPO" &
								plan_data$MSP == 1)))
					}
					if("Anthem_PPO_MSP" %in% lastyr_available_products) {
						lastyr_available_plans <- c(lastyr_available_plans,
							intersect(lastyr_region_year_plans,
								which(plan_data$Issuer_Name == "Anthem" & plan_data$PLAN_NETWORK_TYPE == "PPO" &
								plan_data$MSP == 1)))
					}
				} 
				if ("Blue_Shield" %in% lastyr_available_insurers) {
					# could be HMO, EPO, PPO
					if("Blue_Shield_HMO" %in% lastyr_available_products) {
						lastyr_available_plans <- c(lastyr_available_plans,
							intersect(lastyr_region_year_plans,
								which(plan_data$Issuer_Name == "Blue_Shield" & plan_data$PLAN_NETWORK_TYPE == "HMO" )))
					}
					if("Blue_Shield_EPO" %in% lastyr_available_products) {
						lastyr_available_plans <- c(lastyr_available_plans,
							intersect(lastyr_region_year_plans,
								which(plan_data$Issuer_Name == "Blue_Shield" & plan_data$PLAN_NETWORK_TYPE == "EPO" )))
					}
					if("Blue_Shield_PPO" %in% lastyr_available_products) {
						lastyr_available_plans <- c(lastyr_available_plans,
							intersect(lastyr_region_year_plans,
								which(plan_data$Issuer_Name == "Blue_Shield" & plan_data$PLAN_NETWORK_TYPE == "PPO" )))
					}
				} 
				if ("Health_Net" %in% lastyr_available_insurers) {
					# could be HMO, HSP, EPO, PPO
					if("Health_Net_HMO" %in% lastyr_available_products) {
						lastyr_available_plans <- c(lastyr_available_plans,
							intersect(lastyr_region_year_plans,
								which(plan_data$Issuer_Name == "Health_Net" & plan_data$PLAN_NETWORK_TYPE == "HMO")))
					}
					if("Health_Net_HSP" %in% lastyr_available_products) {
						lastyr_available_plans <- c(lastyr_available_plans,
							intersect(lastyr_region_year_plans,
								which(plan_data$Issuer_Name == "Health_Net" & plan_data$PLAN_NETWORK_TYPE == "HSP")))
					}
					if("Health_Net_EPO" %in% lastyr_available_products) {
						lastyr_available_plans <- c(lastyr_available_plans,
							intersect(lastyr_region_year_plans,
								which(plan_data$Issuer_Name == "Health_Net" & plan_data$PLAN_NETWORK_TYPE == "EPO")))
					}
					if("Health_Net_PPO" %in% lastyr_available_products) {
						lastyr_available_plans <- c(lastyr_available_plans,
							intersect(lastyr_region_year_plans,
								which(plan_data$Issuer_Name == "Health_Net" & plan_data$PLAN_NETWORK_TYPE == "PPO")))
					}
				}
				if ("SHARP" %in% lastyr_available_insurers) {
					# could be network 1 or 2
					if("Sharp1" %in% lastyr_available_products) {
						lastyr_available_plans <- c(lastyr_available_plans,
							intersect(lastyr_region_year_plans,
								which(plan_data$Issuer_Name == "SHARP" & plan_data$Network_num == 1 & 
								!is.na(plan_data$Network_num))))
					}
					if("Sharp2" %in% lastyr_available_products) {
						lastyr_available_plans <- c(lastyr_available_plans,
							intersect(lastyr_region_year_plans,
								which(plan_data$Issuer_Name == "SHARP" & plan_data$Network_num == 2 & 
								!is.na(plan_data$Network_num))))
					}
				}
				if (any(single_product_insurers %in% lastyr_available_insurers)) {
					lastyr_available_plans <- c(lastyr_available_plans,
						intersect(lastyr_region_year_plans,
							which(plan_data$Issuer_Name %in% intersect(lastyr_available_insurers,single_product_insurers))))
				}
				
				lastyr_available_plan_names <- unique(as.character(plan_data[lastyr_available_plans,"Plan_Name_Small_NOHSACAT"]))
				new_plans <- setdiff(available_plan_names,lastyr_available_plan_names)
				new_choices[households_to_update,new_plans] <- 1 

			}
	}
	
	# Restrictions on who can pick ACS plans
		# Must be subsidy eligible and have income below 250%
		# "Unenhanced silver" is not in choice set of ACS-eligibles
		# NOTE: this is not quite accurate if households have both subsidized and unsubsidized members
	
	acs_eligibles73 <- which(households$FPL > 2 & households$FPL <= 2.5 & households$subsidized_members > 0)
	acs_eligibles87 <- which(households$FPL > 1.5 & households$FPL <= 2 & households$subsidized_members > 0)
	acs_eligibles94 <- which(households$FPL <= 1.5 & households$subsidized_members > 0)
	
	# Household AV matrix 
		# Just the plan AV for non-silver plans
		# CSR plan AV for silver plans
	
	AV_matrix <- matrix(0,nrow=dim(households)[1],ncol=length(plan_names_nohsacat),dimnames=list(rownames(households),plan_names_nohsacat))	
	
	AV_matrix[,unique(plan_data[plan_data$metal_level %in% c("Bronze"),"Plan_Name_Small_NOHSACAT"])] <- 0.6
	AV_matrix[,unique(plan_data[plan_data$metal_level %in% c("Gold"),"Plan_Name_Small_NOHSACAT"])] <- 0.8
	AV_matrix[,unique(plan_data[plan_data$metal_level %in% c("Platinum"),"Plan_Name_Small_NOHSACAT"])] <- 0.9
	
	AV_matrix[,unique(plan_data[plan_data$metal_level %in% c("Silver"),"Plan_Name_Small_NOHSACAT"])] <- 0.7
	AV_matrix[acs_eligibles73,unique(plan_data[plan_data$metal_level %in% c("Silver"),"Plan_Name_Small_NOHSACAT"])] <- 0.73
	AV_matrix[acs_eligibles87,unique(plan_data[plan_data$metal_level %in% c("Silver"),"Plan_Name_Small_NOHSACAT"])] <- 0.87
	AV_matrix[acs_eligibles94,unique(plan_data[plan_data$metal_level %in% c("Silver"),"Plan_Name_Small_NOHSACAT"])] <- 0.94
		
	gc()
	
	
	# Input choice
		# Complication with households choosing multiple plans
		# Use choice of household head
	
	plan_data <- rbind(plan_data,rep(NA,dim(plan_data)[2]))
	plan_data[nrow(plan_data),"Plan_Name_Small_NOHSACAT"] <- "Uninsured"
	#plan_data[nrow(plan_data),"plan_unique_id"] <- "Uninsured"
	plan_data[nrow(plan_data),"plan_number_nohsacat"] <- ncol(choices) + 1
	plan_data[nrow(plan_data),"plan_number"] <- max(plan_data$plan_number,na.rm=T) + 1
	#rownames(plan_data)[nrow(plan_data)] <- "Uninsured"
	
	assign_head_plan <- function(x) {
		return(as.integer(x[1]))
	}
	
	assign_oldest <- function(x) {
		return(as.character(x[1]))
	}
	
	households$plan_number <- as.numeric(by(data[,"plan_number"],data[,"household_year"],assign_head_plan)[rownames(households)])
	households$plan_name <- by(data[,"plan_name"],data[,"household_year"],assign_oldest)[rownames(households)]
	households$plan_id <- as.numeric(by(data[,"plan_id"],data[,"household_year"],assign_head_plan)[rownames(households)])
	#households[households$plan_number == max(plan_data$plan_number),"plan_name"] <- "Uninsured"
	
	households$plan_number_nohsacat <- reference_numbers[households$plan_number]
	households[is.na(households$plan_id),"plan_number_nohsacat"] <- ncol(choices) + 1	
		
	choices <- cbind(choices,rep(0,nrow(choices)))
	colnames(choices) <- c(colnames(choices)[1:(ncol(choices)-1)],"Uninsured")
	choices[cbind(seq(nrow(choices)),households$plan_number_nohsacat)] <- 1
	gc()
	
	new_choices <- cbind(new_choices,rep(0,nrow(new_choices)))
	colnames(new_choices) <- c(colnames(new_choices)[1:(ncol(new_choices)-1)],"Uninsured")
	gc()
	
	AV_matrix <- cbind(AV_matrix,rep(0,nrow(AV_matrix)))
	colnames(AV_matrix) <- c(colnames(AV_matrix)[1:(ncol(AV_matrix)-1)],"Uninsured")
	gc()	
		
	network_breadth <- cbind(network_breadth,rep(0,nrow(network_breadth)))
	colnames(network_breadth) <- c(colnames(network_breadth)[1:(ncol(network_breadth)-1)],"Uninsured")
	gc()
	
	network_inclus <- cbind(network_inclus,rep(0,nrow(network_inclus)))
	colnames(network_inclus) <- c(colnames(network_inclus)[1:(ncol(network_inclus)-1)],"Uninsured")
	gc()
	
	# Number of choices in household choice set (deduct 1 because of the uninsured option)
	households$num_choices <- rowSums(!is.na(choices)) - 1
	gc()
	
##### Previous plan choices

	# Previous plan choice is already in household object
	# You need to be careful with CSR plans (shift between CSR plans shouldn't count as change
	
	households$previous_plan_number_nohsacat <- reference_numbers[households$previous_plan_number]
	previous_choices <- matrix(0,nrow(choices),ncol(choices),dimnames=list(rownames(choices),colnames(choices)))
	
	for(j in unique(households$previous_plan_number_nohsacat)) {
		households_on_j <- which(households$previous_plan_number_nohsacat == j)
		previous_choices[households_on_j,j] <- 1
	}
	
	#previous_choices <- cbind(previous_choices,NA)
	#colnames(previous_choices)[ncol(previous_choices)] <- "Uninsured"
	
##### Pairwise inclusivity object

	network_pairinclus <- matrix(0,nrow(choices),ncol(choices),dimnames=list(rownames(choices),colnames(choices)))
	
	netpair_data$plan_a <- as.character(netpair_data$plan_a)
	netpair_data$plan_b <- as.character(netpair_data$plan_b)
	
	netpair_data$plan_number_short_a <- full_reference_numbers[netpair_data$plan_a]
	netpair_data$plan_number_short_b <- full_reference_numbers[netpair_data$plan_b]
	
	netpair_data$plan_name_short_a <- names(plan_numbers_nohsacat)[netpair_data$plan_number_short_a]
	netpair_data$plan_name_short_b <- names(plan_numbers_nohsacat)[netpair_data$plan_number_short_b]
	netpair_data$perm_name_short <- paste(netpair_data$plan_name_short_a,netpair_data$plan_name_short_b,sep="_")
	
	households$plan_name2 <- plan_data[households$plan_id,"Plan_Name_NOCSR"]
	households[households$plan_name2 == "KA_G_COIN" & !is.na(households$plan_name2),"plan_name2"] <- "KA_G" 
	households[households$plan_name2 == "HN_BR_HSA" & !is.na(households$plan_name2),"plan_name2"] <- "HN_BR" 
	
	households$market_plan_id <- paste(households$zip3,households$year,households$plan_name2,sep="_")
	households[is.na(households$plan_name),"market_plan_id"] <- NA
	plan_enrollment_by_market <- by(households$weight,households$market_plan_id,sum)
	
	netpair_data$market_plan_id <- paste(netpair_data$zip3,netpair_data$year,netpair_data$plan_a,sep="_")
	netpair_data$total_enrollees <- 0
	netpair_matches <- which(netpair_data$market_plan_id %in% names(plan_enrollment_by_market))
	netpair_data[netpair_matches,"total_enrollees"] <- 
		plan_enrollment_by_market[netpair_data[netpair_matches,"market_plan_id"]]
	
	netpair_data$zip_year <- paste(netpair_data$zip3,netpair_data$year,sep="_")
	netpair_market_averages <- 
		by(netpair_data$inclusivity * netpair_data$total_enrollees,netpair_data$zip_year,sum,na.rm=T)/
		by(netpair_data$total_enrollees,netpair_data$zip_year,sum,na.rm=T)
	
	for(i in rownames(zip3_choices)) {
			
		t <- zip3_choices[i,"Year"]
		region <- zip3_choices[i,"Region"]
		zip3 <- zip3_choices[i,"zip3"]
		zip_year <- paste(zip3,t,sep="_")
		
		# Find plans available
	
			# Get the broad product categories available (insurer/network type/etc. - no metal)
			region_year_plans <- which(plan_data$ENROLLMENT_YEAR == t &
				plan_data$region == zip3_choices[i,"Region"])
			available_products <- zipchoices[i,]
			available_products <- names(available_products)[!is.na(available_products)]
			available_insurers <- unique(product_definitions[available_products,"insurer"])
			large_available_insurers <- intersect(large_insurers,available_insurers)
			available_plans <- c()
			
			# Now get the specific plans available	
			if ("Anthem" %in% available_insurers) {
				if("Anthem_HMO" %in% available_products) {
					available_plans <- c(available_plans,
						intersect(region_year_plans,
							which(plan_data$Issuer_Name == "Anthem" & plan_data$PLAN_NETWORK_TYPE == "HMO" &
							plan_data$MSP == 0)))
				}
				if("Anthem_EPO" %in% available_products) {
					available_plans <- c(available_plans,
						intersect(region_year_plans,
							which(plan_data$Issuer_Name == "Anthem" & plan_data$PLAN_NETWORK_TYPE == "EPO" &
							plan_data$MSP == 0)))
				}
				if("Anthem_PPO" %in% available_products) {
					available_plans <- c(available_plans,
						intersect(region_year_plans,
							which(plan_data$Issuer_Name == "Anthem" & plan_data$PLAN_NETWORK_TYPE == "PPO" &
							plan_data$MSP == 0)))
				}
				if("Anthem_EPO_MSP" %in% available_products) {
					available_plans <- c(available_plans,
						intersect(region_year_plans,
							which(plan_data$Issuer_Name == "Anthem" & plan_data$PLAN_NETWORK_TYPE == "EPO" &
							plan_data$MSP == 1)))
				}
				if("Anthem_PPO_MSP" %in% available_products) {
					available_plans <- c(available_plans,
						intersect(region_year_plans,
							which(plan_data$Issuer_Name == "Anthem" & plan_data$PLAN_NETWORK_TYPE == "PPO" &
							plan_data$MSP == 1)))
				}
			} 
			if ("Blue_Shield" %in% available_insurers) {
				# could be HMO, EPO, PPO
				if("Blue_Shield_HMO" %in% available_products) {
					available_plans <- c(available_plans,
						intersect(region_year_plans,
							which(plan_data$Issuer_Name == "Blue_Shield" & plan_data$PLAN_NETWORK_TYPE == "HMO" )))
				}
				if("Blue_Shield_EPO" %in% available_products) {
					available_plans <- c(available_plans,
						intersect(region_year_plans,
							which(plan_data$Issuer_Name == "Blue_Shield" & plan_data$PLAN_NETWORK_TYPE == "EPO" )))
				}
				if("Blue_Shield_PPO" %in% available_products) {
					available_plans <- c(available_plans,
						intersect(region_year_plans,
							which(plan_data$Issuer_Name == "Blue_Shield" & plan_data$PLAN_NETWORK_TYPE == "PPO" )))
				}
			} 
			if ("Health_Net" %in% available_insurers) {
				# could be HMO, HSP, EPO, PPO
				if("Health_Net_HMO" %in% available_products) {
					available_plans <- c(available_plans,
						intersect(region_year_plans,
							which(plan_data$Issuer_Name == "Health_Net" & plan_data$PLAN_NETWORK_TYPE == "HMO")))
				}
				if("Health_Net_HSP" %in% available_products) {
					available_plans <- c(available_plans,
						intersect(region_year_plans,
							which(plan_data$Issuer_Name == "Health_Net" & plan_data$PLAN_NETWORK_TYPE == "HSP")))
				}
				if("Health_Net_EPO" %in% available_products) {
					available_plans <- c(available_plans,
						intersect(region_year_plans,
							which(plan_data$Issuer_Name == "Health_Net" & plan_data$PLAN_NETWORK_TYPE == "EPO")))
				}
				if("Health_Net_PPO" %in% available_products) {
					available_plans <- c(available_plans,
						intersect(region_year_plans,
							which(plan_data$Issuer_Name == "Health_Net" & plan_data$PLAN_NETWORK_TYPE == "PPO")))
				}
			}
			if ("SHARP" %in% available_insurers) {
				# could be network 1 or 2
				if("Sharp1" %in% available_products) {
					available_plans <- c(available_plans,
						intersect(region_year_plans,
							which(plan_data$Issuer_Name == "SHARP" & plan_data$Network_num == 1 & 
							!is.na(plan_data$Network_num))))
				}
				if("Sharp2" %in% available_products) {
					available_plans <- c(available_plans,
						intersect(region_year_plans,
							which(plan_data$Issuer_Name == "SHARP" & plan_data$Network_num == 2 & 
							!is.na(plan_data$Network_num))))
				}
			}
			if (any(single_product_insurers %in% available_insurers)) {
				available_plans <- c(available_plans,
					intersect(region_year_plans,
						which(plan_data$Issuer_Name %in% intersect(available_insurers,single_product_insurers))))
			}
			
			available_plan_names <- unique(as.character(plan_data[available_plans,"Plan_Name_Small_NOHSACAT"]))
			
	
		netpair_mkt_indices <- which(netpair_data$year == t & netpair_data$zip3 == zip3 & 
			netpair_data$plan_name_short_a %in% available_plan_names & netpair_data$plan_name_short_b %in% available_plan_names) 
		netpair_mkt_data <- netpair_data[netpair_mkt_indices,]
		netpair_mkt_data <- netpair_mkt_data[!duplicated(netpair_mkt_data$perm_name_short),]
		rownames(netpair_mkt_data) <- netpair_mkt_data$perm_name_short
		
		mkt_inclusivity <- by(netpair_data[netpair_mkt_indices,"inclusivity"],netpair_data[netpair_mkt_indices,"perm_name_short"],mean,na.rm=TRUE)
		netpair_mkt_data[names(mkt_inclusivity),"inclusivity"] <- mkt_inclusivity
		
		households_in_mkt <- which(households$zip3 == zip3 & households$rating_area == region & households$year == t)
		plans_in_mkt <- unique(netpair_mkt_data$plan_number_short_a)
		
		if (length(plans_in_mkt) == 0) {
			households[households_in_mkt,"netpair_missing"] <- 1
		} else {
		
			# Now loop over each plan
			for(j in plans_in_mkt) {
			
				# Permutations for plan j
				plan_permutations <- which(netpair_mkt_data$plan_number_short_a == j)
				plan_permutation_names <- netpair_mkt_data[plan_permutations,"plan_name_short_b"]
			
				# Households that have j as previous choice
				households_to_update <- intersect(households_in_mkt,which(households$previous_plan_number_nohsacat == j))
				
				if(length(households_to_update) > 0 ) {
				
					netpair_to_add <- netpair_mkt_data[plan_permutations,"inclusivity"]
				
					# Have a missing flag in households
					if(sum(is.na(netpair_to_add)) > 0) {
						households[households_to_update,"netpair_missing"] <- 1
					}
					
					if(fill_missing & (zip_year %in% names(netpair_market_averages))) {
						netpair_to_add[is.na(netpair_to_add)] <- netpair_market_averages[zip_year]
					}
					
					# Plug into pairwise inclusivity object
					network_pairinclus[households_to_update,plan_permutation_names] <- 
						matrix(netpair_to_add,length(households_to_update),length(plan_permutation_names),byrow=T)
					
				}
			}
		}
	}
	
	
##### Premium Object

#choices <- get(load("ca_choice_matrix_FEB082016"))

	premiums <- choices
	
	catastrophic_plans <- which(plan_data$metal_level == "Minimum Coverage")
	
	# Enrollment in each plan
	enrollment_weights <- by(households[,"weight"],households[,"plan_id"],sum,na.rm=TRUE)
	plan_data$enrollment <- 0
	plan_data[as.numeric(names(enrollment_weights)),"enrollment"] <- enrollment_weights
	
	# Choices for the exchange population for which we have 3-digit zip (plan has to be offered in year and zip)
	for(i in rownames(zip3_choices)) {
		
			region_year_plans <- which(plan_data$ENROLLMENT_YEAR == zip3_choices[i,"Year"] &
				plan_data$region == zip3_choices[i,"Region"])
		
			# Get the broad product categories available (insurer/network type/etc. - no metal)
			available_products <- zipchoices[i,]
			available_products <- names(available_products)[!is.na(available_products)]
			available_insurers <- unique(product_definitions[available_products,"insurer"])
			
			# Now get the specific plans available
			
			available_plans <- c()
			if ("Anthem" %in% available_insurers) {
				if("Anthem_HMO" %in% available_products) {
					available_plans <- c(available_plans,
						intersect(region_year_plans,
							which(plan_data$Issuer_Name == "Anthem" & plan_data$PLAN_NETWORK_TYPE == "HMO" &
							plan_data$MSP == 0)))
				}
				if("Anthem_EPO" %in% available_products) {
					available_plans <- c(available_plans,
						intersect(region_year_plans,
							which(plan_data$Issuer_Name == "Anthem" & plan_data$PLAN_NETWORK_TYPE == "EPO" &
							plan_data$MSP == 0)))
				}
				if("Anthem_PPO" %in% available_products) {
					available_plans <- c(available_plans,
						intersect(region_year_plans,
							which(plan_data$Issuer_Name == "Anthem" & plan_data$PLAN_NETWORK_TYPE == "PPO" &
							plan_data$MSP == 0)))
				}
				if("Anthem_EPO_MSP" %in% available_products) {
					available_plans <- c(available_plans,
						intersect(region_year_plans,
							which(plan_data$Issuer_Name == "Anthem" & plan_data$PLAN_NETWORK_TYPE == "EPO" &
							plan_data$MSP == 1)))
				}
				if("Anthem_PPO_MSP" %in% available_products) {
					available_plans <- c(available_plans,
						intersect(region_year_plans,
							which(plan_data$Issuer_Name == "Anthem" & plan_data$PLAN_NETWORK_TYPE == "PPO" &
							plan_data$MSP == 1)))
				}
			} 
			if ("Blue_Shield" %in% available_insurers) {
				# could be HMO, EPO, PPO
				if("Blue_Shield_HMO" %in% available_products) {
					available_plans <- c(available_plans,
						intersect(region_year_plans,
							which(plan_data$Issuer_Name == "Blue_Shield" & plan_data$PLAN_NETWORK_TYPE == "HMO" )))
				}
				if("Blue_Shield_EPO" %in% available_products) {
					available_plans <- c(available_plans,
						intersect(region_year_plans,
							which(plan_data$Issuer_Name == "Blue_Shield" & plan_data$PLAN_NETWORK_TYPE == "EPO" )))
				}
				if("Blue_Shield_PPO" %in% available_products) {
					available_plans <- c(available_plans,
						intersect(region_year_plans,
							which(plan_data$Issuer_Name == "Blue_Shield" & plan_data$PLAN_NETWORK_TYPE == "PPO" )))
				}
			} 
			if ("Health_Net" %in% available_insurers) {
				# could be HMO, HSP, EPO, PPO
				if("Health_Net_HMO" %in% available_products) {
					available_plans <- c(available_plans,
						intersect(region_year_plans,
							which(plan_data$Issuer_Name == "Health_Net" & plan_data$PLAN_NETWORK_TYPE == "HMO")))
				}
				if("Health_Net_HSP" %in% available_products) {
					available_plans <- c(available_plans,
						intersect(region_year_plans,
							which(plan_data$Issuer_Name == "Health_Net" & plan_data$PLAN_NETWORK_TYPE == "HSP")))
				}
				if("Health_Net_EPO" %in% available_products) {
					available_plans <- c(available_plans,
						intersect(region_year_plans,
							which(plan_data$Issuer_Name == "Health_Net" & plan_data$PLAN_NETWORK_TYPE == "EPO")))
				}
				if("Health_Net_PPO" %in% available_products) {
					available_plans <- c(available_plans,
						intersect(region_year_plans,
							which(plan_data$Issuer_Name == "Health_Net" & plan_data$PLAN_NETWORK_TYPE == "PPO")))
				}
			}
			if ("SHARP" %in% available_insurers) {
				# could be network 1 or 2
				if("Sharp1" %in% available_products) {
					available_plans <- c(available_plans,
						intersect(region_year_plans,
							which(plan_data$Issuer_Name == "SHARP" & plan_data$Network_num == 1 & 
							!is.na(plan_data$Network_num))))
				}
				if("Sharp2" %in% available_products) {
					available_plans <- c(available_plans,
						intersect(region_year_plans,
							which(plan_data$Issuer_Name == "SHARP" & plan_data$Network_num == 2 & 
							!is.na(plan_data$Network_num))))
				}
			}
			if (any(single_product_insurers %in% available_insurers)) {
				available_plans <- c(available_plans,
					intersect(region_year_plans,
						which(plan_data$Issuer_Name %in% intersect(available_insurers,single_product_insurers))))
			}
			
			available_plans <- unique(available_plans)
			available_plan_names <- as.character(plan_data[available_plans,"Plan_Name_Small_NOHSACAT"])
			
			# These are the premiums before merging the small plans and HSA/catastrophic/bronze
			premium_plans <- plan_data[available_plans,"Premium"]/1.278
			names(premium_plans) <- available_plan_names
			
			
			# Here we need to do something about the small plans
					# Solution 1: take the minimum for small plans (what I do here)
					# Solution 2: average
					# NOTE: this ends up catching 2 Health Net plans (an HSP and PPO get merged) - no big deal
				
			# For bronze/hsa/cat - take minimum of the bronze plans	
				
				duplicate_plans <- unique(available_plan_names[duplicated(available_plan_names)])					
				for(s in duplicate_plans) {
					plan_indices <- which(available_plan_names == s)
					plans_to_merge <- available_plans[plan_indices]
					plans_to_merge <- setdiff(plans_to_merge,catastrophic_plans)
					if (use_mean & (sum(plan_data[plans_to_merge,"enrollment"]) > 0)) {
						premium_plans[plan_indices] <- weighted.mean(plan_data[plans_to_merge,"Premium"],
							plan_data[plans_to_merge,"enrollment"])/1.278
					} else if (use_mean) {
						premium_plans[plan_indices] <- mean(plan_data[plans_to_merge,"Premium"])/1.278
					} else {
						premium_plans[plan_indices] <- min(plan_data[plans_to_merge,"Premium"]/1.278)
					}
	
				}
			
			unique_available_plan_names <- which(!duplicated(available_plan_names))
			available_plan_names <- available_plan_names[unique_available_plan_names]
			premium_plans <- premium_plans[unique_available_plan_names]	
			
			households_to_update <- which(households$zip_region_year == i & !is.na(households$zip_region_year))
			
			premiums[households_to_update,available_plan_names] <- households[households_to_update,"rating_factor"] %*% t(premium_plans)
	}

	gc()
	
	# Insert Mandate Penalty as the Price of Being Uninsured
	# I am converting this penalty to a monthly basis 
	premiums[,"Uninsured"] <- households$penalty/12
	gc()
	
	# Apply Subsidy to all plans except catastrophic
	#catastrophic_plans <- unique(plan_data[plan_data$metal_level == "Minimum Coverage","Plan_Name_Small"])
	#noncatastrophic_plans <- which(!colnames(premiums) %in% catastrophic_plans)
	subsidies <- households$subsidy
	subsidies[is.na(subsidies)] <- 0
	
	unsubsidized_premiums <- premiums
	gc()
	premiums <- pmax(premiums - subsidies,0)
	gc()
	
	# Set premiums of plans not in choice set to NA
	premiums <- premiums * pmax(pmin(choices,1),1)	
	gc()
	subsidies <- unsubsidized_premiums - premiums
	gc()
	rm(unsubsidized_premiums)
	gc()
	subsidies[,ncol(subsidies)] <- 0
	gc()

	if (fill_missing) {
	
		save(premiums,file="ca_premium_matrix_JUN182021_net_fillmissing")		
		save(choices,file="ca_choice_matrix_JUN182021_net_fillmissing")	
		save(new_choices,file="ca_new_choice_matrix_JUN182021_net_fillmissing")	
		save(previous_choices,file="ca_previous_choice_matrix_JUN182021_net_fillmissing")	
		save(subsidies,file="ca_subsidy_matrix_JUN182021_net_fillmissing")
		save(network_breadth,file="ca_network_breadth_matrix_JUN182021_net_fillmissing")	
		save(network_inclus,file="ca_network_inclus_matrix_JUN182021_net_fillmissing")	
		save(network_pairinclus,file="ca_network_pairinclus_matrix_JUN182021_net_fillmissing")	
		save(AV_matrix,file="ca_AV_matrix_JUN182021_net_fillmissing")	
	
	} else if (keep_missing) {
	
		save(premiums,file="ca_premium_matrix_JUN182021_net_keepmissing")		
		save(choices,file="ca_choice_matrix_JUN182021_net_keepmissing")	
		save(new_choices,file="ca_new_choice_matrix_JUN182021_net_keepmissing")	
		save(previous_choices,file="ca_previous_choice_matrix_JUN182021_net_keepmissing")	
		save(subsidies,file="ca_subsidy_matrix_JUN182021_net_keepmissing")
		save(network_breadth,file="ca_network_breadth_matrix_JUN182021_net_keepmissing")	
		save(network_inclus,file="ca_network_inclus_matrix_JUN182021_net_keepmissing")	
		save(network_pairinclus,file="ca_network_pairinclus_matrix_JUN182021_net_keepmissing")	
		save(AV_matrix,file="ca_AV_matrix_JUN182021_net_keepmissing")	
	
	} else {

		keep_indices <- which(households$netpair_missing == 0)
		households <- households[keep_indices,]
		data <- data[data$household_year %in% rownames(households),]	
		
		premiums <- premiums[keep_indices,]
		choices <- choices[keep_indices,]
		new_choices <- new_choices[keep_indices,]
		previous_choices <- previous_choices[keep_indices,]
		subsidies <- subsidies[keep_indices,]
		network_breadth <- network_breadth[keep_indices,]
		network_inclus <- network_inclus[keep_indices,]
		network_pairinclus <- network_pairinclus[keep_indices,]
		AV_matrix <- AV_matrix[keep_indices,]

		save(premiums,file="ca_premium_matrix_JUN182021_net")		
		save(choices,file="ca_choice_matrix_JUN182021_net")	
		save(new_choices,file="ca_new_choice_matrix_JUN182021_net")	
		save(previous_choices,file="ca_previous_choice_matrix_JUN182021_net")	
		save(subsidies,file="ca_subsidy_matrix_JUN182021_net")
		save(network_breadth,file="ca_network_breadth_matrix_JUN182021_net")	
		save(network_inclus,file="ca_network_inclus_matrix_JUN182021_net")	
		save(network_pairinclus,file="ca_network_pairinclus_matrix_JUN182021_net")	
		save(AV_matrix,file="ca_AV_matrix_JUN182021_net")	
	}	
	
rm(subsidies)
rm(premiums)
rm(previous_choices)
rm(new_choices)
rm(network_breadth)
rm(network_inclus)
gc()
	
##### Households Object

	compute.number.unique <- function(x) {
		return(length(unique(x)))
	}	

	#households[households$plan_name == "Uninsured","plan_name"] <- NA	
	households[is.na(households$previous_plan_offered),"previous_plan_offered"] <- 0	
	households$enrolled_beg_year <- as.numeric(is.na(households$previous_plan_offered) & (households$SEP == 0 & !is.na(households$SEP)))
	
	# Service Channel
		
		data$channel <- as.character(data$service_channel)
		data[data$channel %in% c("CIA","PBE"),"channel"] <- "Insurance Agent"
		data[data$channel %in% c("SCR","CEW","CEC"),"channel"] <- "Other Assistance"
		data[is.na(data$plan_id),"channel"] <- NA
	
		assign_service_channel <- function(x) {
			unique_channel <- as.character(unique(x))
			if(length(unique_channel) == 1) {
				return(unique_channel)
			} else if ("Insurance Agent" %in%  x) {
				return("Insurance Agent")
			} else if ("Other Assistance" %in%  x) {
				return("Other Assistance")
			} else {
				return("Unassisted")
			}
		}
	
		service_channels <- by(data[!is.na(data$channel),"channel"],data[!is.na(data$channel),"household_year"],assign_service_channel)
		households$channel <- NA
		households[names(service_channels),"channel"] <- service_channels 
	
	# Language spoken
	
		data$language <- as.character(data$language_spoken)
		data[data$language == "English","language"] <- "English"
		data[data$language == "Spanish","language"] <- "Spanish"
		data[!data$language %in% c("(nonres","English","Spanish"),"language"] <- "Other Language"
		data[data$language == "(nonres","language"] <- NA
	
		assign_language <- function(x) {
			unique_channel <- as.character(unique(x))
			if(length(unique_channel) == 1) {
				return(unique_channel)
			} else if ("English" %in%  x) {
				return("English")
			} else if ("Spanish" %in%  x) {
				return("Spanish")
			} else {
				return("Other Language")
			}
		}
	
		languages <- by(data[!is.na(data$language),"language"],data[!is.na(data$language),"household_year"],assign_language)
		households$language <- NA
		households[names(languages),"language"] <- languages 
	
	# Mean age
	mean_ages <- by(data$AGE,data$household_year,mean)
	households[names(mean_ages),"mean_age"] <- mean_ages
	
	# SEP - make sure no NA values (could be NA if uninsured)
	households[is.na(households$plan_id),"SEP"] <- 0
	
	# SLC_contribution - make sure no NA values
	households[is.na(households$SLC_contribution),"SLC_contribution"] <- 
		households[is.na(households$SLC_contribution),"premiumSLC"] 
	
	fields <- sort(c("plan_name","plan_id","household_id","plan_number","FPL","household_size","enrollees","out_of_market",
		"perc_0to17","perc_18to25","perc_26to34","perc_35to44","perc_45to54","perc_55to64","perc_65plus","perc_asian","perc_black","perc_hispanic","perc_other","perc_white", 
		"perc_male","zip3","zip_region_year","rating_area","year","rating_factor","subsidy","subsidized_members","premiumSLC","SLC_contribution",
		"penalty","weight","exempt.belowfiling","exempt.unaffordable",
		"dep_midyear","plan_number_nocsr","previous_plan_number","previous_plan_offered","SEP","enrolled_beg_year",
		"channel","language","mean_age","num_choices","netpair_missing"))	
		
	# Save Data
	households <- households[,fields]
	gc()
	
	if (fill_missing) {
		save(households,file="ca_household_characteristics_JUN182021_net_fillmissing")	
		write.csv(households,file="ca_household_characteristics_JUN182021_net_fillmissing.csv",row.names=FALSE)		
	} else if (keep_missing) {
		save(households,file="ca_household_characteristics_JUN182021_net_keepmissing")	
		write.csv(households,file="ca_household_characteristics_JUN182021_net_keepmissing.csv",row.names=FALSE)		
	} else {
		save(households,file="ca_household_characteristics_JUN182021_net")	
		write.csv(households,file="ca_household_characteristics_JUN182021_net.csv",row.names=FALSE)	
	}
	
##### Instruments

	plan_data$Insurer <- plan_data$Issuer_Name
	plan_data$Issuer_Name <- NULL

	plan_data$Year <- plan_data$ENROLLMENT_YEAR
	plan_data$ENROLLMENT_YEAR <- NULL
	
	plan_data$Metal_Level <- plan_data$metal_level
	plan_data$metal_level <- NULL
	
	# Hausman Instruments
		# Use premiums charged by the firm in other markets for the same metal tier plan
		# If firm only offers plans in 1 rating area, set instrument equal to 0
		# First add instrument to the plan object
		# Then create I * J matrix of instruments
	
	plan_data$Plan_Name_Small <- as.character(plan_data$Plan_Name_Small)
	plan_data$Insurer <- as.character(plan_data$Insurer)
	plan_data$Metal_Level <- as.character(plan_data$Metal_Level)
	
	compute_Hausman_instruments <- function(x,plan_data) {
		plan_to_check <- plan_data[x,]
		plans_other_markets <- rownames(plan_data[plan_data$Plan_Name_Small == plan_to_check$Plan_Name_Small & plan_data$Rating_Area != plan_to_check$Rating_Area &
			plan_data$Year == plan_to_check$Year & plan_data$Metal_Level == plan_to_check$Metal_Level,])
		
		if(length(plans_other_markets) == 0) {
			return(0)
		} else {
			return(mean(plan_data[plans_other_markets,"Premium"],na.rm=TRUE))
		}
	}
	
	plan_data$Hausman_instrument <- sapply(rownames(plan_data),FUN = compute_Hausman_instruments,plan_data)
	plan_data["Uninsured","Hausman_instrument"] <- 0
	plan_data$Hausman_missing <- as.numeric(plan_data$Hausman_instrument == 0)
	
	# BLP instruments
		# For a given plan j, use sum of non-premium plan characteristics for all other plans offered by the firm selling plan j
		# For a given plan j, use sum of non-premium plan characteristics for plans offered by competitors
	
	characteristics <- c("HMO","HSA","AV","MSP") 
	
		# HMO
		plan_data$HMO <- as.numeric(plan_data$PLAN_NETWORK_TYPE == "HMO")
	
		# AV - minimum coverage is an approximation
		AVs <- c(0.55,0.6,0.7,0.8,0.9,0.73,0.87,0.94)
		names(AVs) <- c("Minimum Coverage","Bronze","Silver","Gold","Platinum","Silver - Enhanced 73","Silver - Enhanced 87","Silver - Enhanced 94")
		plan_data$AV <- AVs[as.character(plan_data$Metal_Level)]
	
	compute_BLP_instruments <- function(x,plan_data,characteristic,own=FALSE) {
		plan_to_check <- plan_data[x,]
		
		if(own) {
			comparison_plans <- setdiff(rownames(plan_data[plan_data$Insurer == plan_to_check$Insurer & 
				plan_data$Rating_Area == plan_to_check$Rating_Area & plan_data$Year == plan_to_check$Year & 
				!is.na(plan_data$Insurer),]),rownames(plan_to_check))
		} else {
			comparison_plans <- rownames(plan_data[plan_data$Insurer != plan_to_check$Insurer & 
				plan_data$Rating_Area == plan_to_check$Rating_Area & plan_data$Year == plan_to_check$Year & 
				!is.na(plan_data$Insurer),])	
		}
		
		return(mean(plan_data[comparison_plans,characteristic]))
	}
	
	plan_data$HSA_own <- sapply(rownames(plan_data),FUN = compute_BLP_instruments,plan_data,characteristic = "HSA",own=TRUE)
	plan_data$HSA_other <- sapply(rownames(plan_data),FUN = compute_BLP_instruments,plan_data,characteristic = "HSA",own=FALSE)
	plan_data$HMO_own <- sapply(rownames(plan_data),FUN = compute_BLP_instruments,plan_data,characteristic = "HMO",own=TRUE)
	plan_data$HMO_other <- sapply(rownames(plan_data),FUN = compute_BLP_instruments,plan_data,characteristic = "HMO",own=FALSE)
	plan_data$AV_own <- sapply(rownames(plan_data),FUN = compute_BLP_instruments,plan_data,characteristic = "AV",own=TRUE)
	plan_data$AV_other <- sapply(rownames(plan_data),FUN = compute_BLP_instruments,plan_data,characteristic = "AV",own=FALSE)
	plan_data$MSP_own <- sapply(rownames(plan_data),FUN = compute_BLP_instruments,plan_data,characteristic = "MSP",own=TRUE)
	plan_data$MSP_other <- sapply(rownames(plan_data),FUN = compute_BLP_instruments,plan_data,characteristic = "MSP",own=FALSE)
	
	plan_data$Small_Insurer <- 0
	plan_data[!plan_data$Insurer %in% c("Anthem","Blue_Shield","Health_Net","Kaiser"),"Small_Insurer"] <- 1
	
	plan_data$Anthem <- as.numeric(plan_data$Insurer == "Anthem")
	plan_data$Blue_Shield <- as.numeric(plan_data$Insurer == "Blue_Shield")
	#plan_data$Chinese_Community <- as.numeric(plan_data$Insurer == "Chinese_Community")
	#plan_data$Contra_Costa <- as.numeric(plan_data$Insurer == "Contra_Costa")
	plan_data$Health_Net <- as.numeric(plan_data$Insurer == "Health_Net")
	plan_data$Kaiser <- as.numeric(plan_data$Insurer == "Kaiser")
	#plan_data$LA_Care <- as.numeric(plan_data$Insurer == "LA_Care")
	#plan_data$Molina <- as.numeric(plan_data$Insurer == "Molina")
	#plan_data$Oscar <- as.numeric(plan_data$Insurer == "Oscar")
	#plan_data$Sharp <- as.numeric(plan_data$Insurer == "SHARP")
	#plan_data$United <- as.numeric(plan_data$Insurer == "United")
	#plan_data$Valley <- as.numeric(plan_data$Insurer == "Valley")
	#plan_data$Western_Health <- as.numeric(plan_data$Insurer == "Western")
	
	plan_data$Catastrophic <- as.numeric(plan_data$Metal_Level == "Minimum Coverage")
	plan_data$Bronze <- as.numeric(plan_data$Metal_Level == "Bronze")
	plan_data$Gold <- as.numeric(plan_data$Metal_Level == "Gold")
	plan_data$Platinum <- as.numeric(plan_data$Metal_Level == "Platinum")
	plan_data$Silver <- as.numeric(plan_data$Metal_Level %in% c("Silver","Silver - Enhanced 73","Silver - Enhanced 87","Silver - Enhanced 94"))
	
	# Change to 21-year old Premium
	plan_data$Premium <- plan_data$Premium/1.278
	
##### Plan Object

julia_fields <- c("Plan_Name_Small_NOHSACAT","Insurer","Metal_Level","Premium","HSA",               
					"MSP","HMO","AV",
					"Anthem","Blue_Shield","Health_Net","Kaiser","Small_Insurer",
					"Catastrophic","Bronze","Silver","Gold","Platinum")      
fields <- c(julia_fields,"HSA_own","HSA_other","HMO_own","HMO_other","AV_own","AV_other","Rating_Area","Year")
					
julia_plan_data <- plan_data[!duplicated(plan_data$Plan_Name_Small_NOHSACAT),julia_fields]
julia_plan_data <- julia_plan_data[1:(nrow(julia_plan_data)-1),]
julia_plan_data <- julia_plan_data[!julia_plan_data$Plan_Name_Small_NOHSACAT == "Uninsured",] 
rownames(julia_plan_data) <- julia_plan_data$Plan_Name_Small_NOHSACAT
			
julia_plan_data$Plan_Name <- julia_plan_data$Plan_Name_Small_NOHSACAT	
julia_plan_data$Plan_Name_Small_NOHSACAT <- NULL
	
save(plan_data,file="ca_plan_characteristics_JUN182021_net")	
write.csv(julia_plan_data,file="ca_plan_characteristics_JUN182021_net.csv",row.names=FALSE)
	